knitr::opts_chunk$set(echo = TRUE)
suppressMessages(library(Seurat))
suppressMessages(library(ggplot2))
## Warning: package 'ggplot2' was built under R version 4.2.3
suppressMessages(library(gridExtra))
suppressMessages(library(viridis))
suppressMessages(library(stringr))
suppressMessages(library(randomcoloR))
suppressMessages(library(reshape2))
suppressMessages(library(dplyr))
suppressMessages(library(tidyverse))
suppressMessages(library(rlang))
## Warning: package 'rlang' was built under R version 4.2.3
Here we are going explore customizable R codes that can be used to making decent graphical visualization for single cell datasets. Seurat package will be used to load and analyse a 10X single cell dataset of sequenced from colorectal cancer patients. Cell-type identity will be followed as described by the authors of the dataset.
The dataset used for this illustration is downloaded form:
Unfiltered dataset were downloaded and used here.
The data was published by Sathe et. al in the journal of Clinical Cancer Research.
Titled: Colorectal Cancer Metastases in the Liver Establish Immunosuppressive Spatial Networking between Tumor-Associated SPP1+ Macrophages and Fibroblasts
DOI: https://doi.org/10.1158/1078-0432.CCR-22-2041
According to the description of the provided by the authors, the Cell labels file contains cell lineage annotation for each single cell following subset analysis as described in the manuscript.
Column identifiers in the file:
- ‘cell_barcode’: original cell
barcode appended with a prefix corresponding to orig.ident described
below.
- ‘orig.ident’: Sample name in the “patientID_condition”
format described above.
- ‘condition’: normal/tumor/pbmc
-
‘final_celltype’: cell lineage assigned following subset analysis with
re-clustering
The data consist of 7 metastasized colorectal cancer (mCRC) from the liver, 4 normal liver sample and 2 Peripheral Blood Mononuclear Cells (PBMC) samples.
n5784_PBMC = Read10X('sathe_et_al/mCRC_scRNA_raw/5784_PBMC/')
n6335_PBMC = Read10X('sathe_et_al/mCRC_scRNA_raw/6335_PBMC/')
n6198_normal_liver = Read10X('sathe_et_al/mCRC_scRNA_raw/6198_normal_liver/')
n5915_normal_liver = Read10X('sathe_et_al/mCRC_scRNA_raw/5915_normal_liver/')
n6335_normal_liver = Read10X('sathe_et_al/mCRC_scRNA_raw/6335_normal_liver/')
n6648_normal_liver = Read10X('sathe_et_al/mCRC_scRNA_raw/6648_normal_liver/')
n5784_mCRC = Read10X('sathe_et_al/mCRC_scRNA_raw/5784_mCRC/')
n5915_mCRC = Read10X('sathe_et_al/mCRC_scRNA_raw/5915_mCRC/')
n6198_mCRC = Read10X('sathe_et_al/mCRC_scRNA_raw/6198_mCRC/')
n6335_mCRC = Read10X('sathe_et_al/mCRC_scRNA_raw/6335_mCRC/')
n6593_mCRC = Read10X('sathe_et_al/mCRC_scRNA_raw/6593_mCRC/')
n6648_mCRC = Read10X('sathe_et_al/mCRC_scRNA_raw/6648_mCRC/')
n8640_mCRC = Read10X('sathe_et_al/mCRC_scRNA_raw/8640_mCRC/')
# creating seurat objects
n5784_PBMC = CreateSeuratObject(n5784_PBMC,min.cells = 3, min.genes = 200, min.features=10,project = "5784_PBMC")
n6335_PBMC = CreateSeuratObject(n6335_PBMC,min.cells = 3, min.genes = 200, min.features=10,project = "6335_PBMC")
n5915_normal_liver = CreateSeuratObject(n5915_normal_liver,min.cells = 3, min.genes = 200, min.features=10,project = "5915_normal_liver")
n6198_normal_liver = CreateSeuratObject(n6198_normal_liver,min.cells = 3, min.genes = 200, min.features=10,project = "6198_normal_liver")
n6335_normal_liver = CreateSeuratObject(n6335_normal_liver,min.cells = 3, min.genes = 200, min.features=10,project = "6335_normal_liver")
n6648_normal_liver = CreateSeuratObject(n6648_normal_liver,min.cells = 3, min.genes = 200, min.features=10,project = "6648_normal_liver")
n5784_mCRC = CreateSeuratObject(n5784_mCRC,min.cells = 3, min.genes = 200, min.features=10,project = "5784_mCRC")
n5915_mCRC = CreateSeuratObject(n5915_mCRC,min.cells = 3, min.genes = 200, min.features=10,project = "5915_mCRC")
n6198_mCRC = CreateSeuratObject(n6198_mCRC,min.cells = 3, min.genes = 200, min.features=10,project = "6198_mCRC")
n6335_mCRC = CreateSeuratObject(n6335_mCRC,min.cells = 3, min.genes = 200, min.features=10,project = "6335_mCRC")
n6593_mCRC = CreateSeuratObject(n6593_mCRC,min.cells = 3, min.genes = 200, min.features=10,project = "6593_mCRC")
n6648_mCRC = CreateSeuratObject(n6648_mCRC,min.cells = 3, min.genes = 200, min.features=10,project = "6648_mCRC")
n8640_mCRC = CreateSeuratObject(n8640_mCRC,min.cells = 3, min.genes = 200, min.features=10,project = "8640_mCRC")
seu_object = list(n5784_PBMC,n5784_mCRC,n5915_mCRC,n5915_normal_liver,n6198_mCRC,n6198_normal_liver,n6335_PBMC,n6335_mCRC,n6335_normal_liver,n6593_mCRC,n6648_mCRC,n6648_normal_liver,n8640_mCRC)
cell_counts <- c()
sample_name <- c()
features <- c()
for (i in 1:length(seu_object))
{
cell_counts <- c(cell_counts,dim(seu_object[[i]]@assays$RNA@counts)[2])
features <- c(features,dim(seu_object[[i]]@assays$RNA@counts)[1])
sample_name <- c(sample_name,seu_object[[i]]@project.name)
}
df <- data.frame("Sample Name"=sample_name,"Cell Count"=cell_counts,"Total Features"=features)
rmarkdown::paged_table(df,options = list(rows.print = 7, rownames.print=T))
index = which.max(features)
sample_name_order <- c(df$Sample.Name[index])
for (i in 1:length(df$Sample.Name))
{
if (i != index)
{
sample_name_order <- c(sample_name_order,df$Sample.Name[i])
}
}
crc_merged <- merge(seu_object[[5]], y= c(seu_object[[1]],seu_object[[2]],seu_object[[3]],seu_object[[4]],seu_object[[6]],seu_object[[7]],seu_object[[8]],seu_object[[9]],seu_object[[10]],seu_object[[11]],seu_object[[12]],seu_object[[13]]),add.cell.ids=sample_name_order,project = "CRCLM")
rm(n5784_PBMC,n5784_mCRC,n5915_mCRC,n5915_normal_liver,n6198_mCRC,n6198_normal_liver,n6335_PBMC,n6335_mCRC,n6335_normal_liver,n6593_mCRC,n6648_mCRC,n6648_normal_liver,n8640_mCRC)
saveRDS(crc_merged,"sathe_et_al/merged_crc.rds")
metadata = read.csv('sathe_et_al/mCRC_scRNA_raw/mCRC_cell_labels.csv')
# calculating the percentage mt RNA using seurat inbuild function
crc_merged$percent.mt <- PercentageFeatureSet(crc_merged, pattern = "^MT-")
# setting the order of Y axis
label_order = c("5915_normal_liver","6198_normal_liver","6335_normal_liver","6648_normal_liver",
"5784_mCRC","5915_mCRC","6198_mCRC","6335_mCRC","6593_mCRC","6648_mCRC","8640_mCRC",
"5784_PBMC","6335_PBMC")
Idents(crc_merged) <- 'orig.ident'
p <- VlnPlot(crc_merged,features = c('percent.mt'))
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
p <- p + scale_y_continuous(breaks=seq(0,100,by=5))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p$data$ident <- factor(p$data$ident, levels = label_order) # pushing the sort order into the plot
p <- p + theme(legend.position="none")
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5,size=15),axis.text.y = element_text(size=20))
p
Idents(crc_merged) <- 'orig.ident'
p <- VlnPlot(crc_merged,features = c('nFeature_RNA'),raster = F,pt.size = 0.5)
p <- p + scale_y_continuous(breaks=seq(0,60000,by=500))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p$data$ident <- factor(p$data$ident, levels = label_order) # pushing the sort order into the plot
p <- p + theme(legend.position="none")
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5,size=15),axis.text.y = element_text(size=20))
p
p <- VlnPlot(crc_merged,features = c('nFeature_RNA'),raster = F,pt.size = 0)
p <- p + scale_y_continuous(breaks=seq(0,60000,by=500))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p$data$ident <- factor(p$data$ident, levels = label_order) # pushing the sort order into the plot
p <- p + theme(legend.position="none")
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5,size=15),axis.text.y = element_text(size=20))
p
Idents(crc_merged) <- 'orig.ident'
p <- VlnPlot(crc_merged,features = c('nCount_RNA'),raster = F,pt.size = 0.5)
p <- p + scale_y_continuous(breaks=seq(0,100000,by=3000))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p$data$ident <- factor(p$data$ident, levels = label_order) # pushing the sort order into the plot
p <- p + theme(legend.position="none")
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5,size=15),axis.text.y = element_text(size=20))
p
Idents(crc_merged) <- 'orig.ident'
p <- VlnPlot(crc_merged,features = c('nCount_RNA'),raster = F,pt.size = 0)
p <- p + scale_y_continuous(breaks=seq(0,100000,by=3000))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p$data$ident <- factor(p$data$ident, levels = label_order) # pushing the sort order into the plot
p <- p + theme(legend.position="none")
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5,size=15),axis.text.y = element_text(size=20))
p
#Filtering cells - We will filter low quality cells for this analysis - To make things easy we shall remove cells which have genes lower than 200 and above 6000 - We shall also remove cells with high mitochondial DNA - We shall make a scatter plot to high the cells we are going to keep in green.
p <- FeatureScatter(crc_merged,feature1 = "nFeature_RNA",feature2 = "percent.mt",pt.size = 0.5,group.by = 'orig.ident')
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
p <- p + theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust=1))
p <- p + scale_y_continuous(breaks=seq(0,100,by=5))
p <- p + scale_x_continuous(breaks=seq(0,60000,by=500))
#Cells to remove
p <- p + annotate("rect",xmin = -2,xmax=200,ymin=-0.5,ymax=20,fill = "brown4", alpha = 0.5)
p <- p + annotate("rect",xmin = -2,xmax=9000,ymin=20,ymax=101,fill = "brown4", alpha = 0.5)
p <- p + annotate("rect",xmin = 6000,xmax=9000,ymin=-0.5,ymax=20,fill = "brown4", alpha = 0.5)
p <- p + annotate("rect",xmin = 200,xmax=500,ymin=5,ymax=20,fill = "brown4", alpha = 0.5)
#Cells to keep
p <- p + annotate("rect",xmin = 500,xmax=6000,ymin=-0.5,ymax=20,fill = "chartreuse4", alpha = 0.5)
p <- p + annotate("rect",xmin = 200,xmax=500,ymin=-0.5,ymax=5,fill = "chartreuse4", alpha = 0.5)
p
memory.limit(size = 500000)
[1] Inf
crc_merged<- subset(crc_merged, subset = (nFeature_RNA > 200 & percent.mt < 20 ))
crc_merged<- subset(crc_merged, subset = (nFeature_RNA < 6000))
crc_merged <- NormalizeData(crc_merged, normalization.method = "LogNormalize", scale.factor = 10000)
saveRDS(crc_merged,"sathe_et_al/merged_crc_normalized.rds")
crc_merged <- FindVariableFeatures(crc_merged, selection.method = "vst", nfeatures = 1500)
top_genes <- head(VariableFeatures(crc_merged), 50)
plot1 <- VariableFeaturePlot(crc_merged)
plot2 <- LabelPoints(plot = plot1, points = top_genes, repel = TRUE)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot2 + theme(axis.text.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5,size=15),
axis.text.y = element_text(size=20))
memory.limit(size = 500000)
## [1] Inf
all_genes <- rownames(data)
crc_merged <- ScaleData(crc_merged, verbose = T, features = all_genes)
## Centering and scaling data matrix
crc_merged <- ScaleData(crc_merged, verbose = T, vars.to.regress = "percent.mt")
## Regressing out percent.mt
## Centering and scaling data matrix
crc_merged<- RunPCA(crc_merged, features = VariableFeatures(object = crc_merged), npcs = 50, verbose = T)
## PC_ 1
## Positive: FBLN1, SFN, ASPSCR1, SOX4, CALCB, CELF4, MDK, CLDN4, HES6, EPCAM
## SLC1A5, HSPB1, STMN1, DEFA6, CNPY1, TACSTD2, CKB, DEFA5, NOTUM, VGF
## GADD45A, BAMBI, ID3, PMAIP1, ELF3, CHGB, CST1, UBE2S, MYH7B, LHX1
## Negative: S100A4, TYROBP, CTSS, S100A9, S100A8, LST1, VIM, LYZ, FCER1G, NEAT1
## AIF1, CD74, FCN1, S100A12, HLA-DRA, CYBB, MNDA, HBB, CCL4, CSTA
## CD163, FPR1, CSF3R, AC020656.1, MPEG1, SELL, SLC11A1, HLA-DRB1, ACSL1, HLA-DPB1
## PC_ 2
## Positive: ASPSCR1, CALCB, FBLN1, DEFA6, SFN, CELF4, CNPY1, DEFA5, HES6, SLC1A5
## NOTUM, CHGB, VGF, MYH7B, KLK12, SCGB1D2, LHX1, DWORF, STMN1, GZMA
## ASCL2, TACSTD2, EPCAM, CST1, LINC00867, UBE2S, CLDN4, BEX5, SP5, C12orf75
## Negative: CALD1, MGP, SPARC, COL4A1, COL3A1, COL1A2, COL4A2, FN1, COL1A1, TAGLN
## AEBP1, BGN, DCN, CAVIN1, TIMP3, IGFBP7, COL5A2, LUM, COL6A3, POSTN
## TPM2, ACTA2, THBS2, COL6A2, CTHRC1, COL6A1, C11orf96, NNMT, A2M, IGFBP5
## PC_ 3
## Positive: COL1A2, COL3A1, COL1A1, GZMA, CALD1, DCN, AEBP1, ACTA2, LUM, TPM2
## CD69, POSTN, COL5A2, COL6A2, COL6A3, MGP, MYL9, COL4A2, TAGLN, THBS2
## HOPX, COL5A1, IGFBP5, CTHRC1, COL4A1, ASPN, SULF1, CDH11, BGN, PRRX1
## Negative: LYZ, AIF1, LST1, CTSS, CST3, MAFB, CD163, CTSB, CYBB, CSTA
## TYROBP, NAMPT, S100A9, FCN1, FTL, FCER1G, HLA-DRA, CD68, ACSL1, SLC11A1
## MPEG1, C5AR1, FCGR2A, S100A8, MNDA, AC020656.1, RAB31, FPR1, MS4A7, PLAUR
## PC_ 4
## Positive: CRHBP, IFI27, TM4SF1, DNASE1L3, FCN3, PLPP3, LIFR, FCN2, CLEC4G, PTPRB
## RAMP3, SELENOP, PCAT19, AKAP12, FLT1, CLEC1B, ADGRL4, RAMP2, LDB2, CCL14
## CLDN5, GNG11, OIT3, TSPAN7, IL33, LIMCH1, SLC9A3R2, FAM107A, APOLD1, TFPI2
## Negative: COL1A2, COL3A1, COL1A1, VCAN, LUM, COL6A3, THBS2, COL5A2, DCN, AEBP1
## POSTN, BGN, CTHRC1, TAGLN, COL5A1, ACTA2, FN1, ASPN, SULF1, PRRX1
## TPM2, INHBA, THY1, MXRA8, C1S, ANTXR1, PCOLCE, LGALS1, FAP, EDIL3
## PC_ 5
## Positive: CEACAM5, KRT20, PHGR1, TFF3, FXYD3, TSPAN8, SPINK1, CEACAM6, AGR2, LGALS4
## CLDN3, C19orf33, CDH17, ERBB2, S100A14, KRT23, MUC13, GPX2, TFF1, LGALS3
## FERMT1, GPRC5A, SMIM22, S100A6, LIPH, PGAP3, GRB7, FABP1, TSPAN1, S100P
## Negative: FBLN1, ASPSCR1, IGFBP7, CALCB, DEFA6, CELF4, DEFA5, CNPY1, ID3, STMN1
## RBP1, VGF, HES6, LHX1, MYH7B, CHGB, KLK12, SCGB1D2, HMGN2, FCN3
## CRHBP, DNASE1L3, PMAIP1, UBE2S, SLC1A5, SFN, RGS16, DWORF, RAMP2, GADD45A
saveRDS(crc_merged,"sathe_et_al/merged_crc_normalized_pca.rds")
ElbowPlot(crc_merged, ndims = 50, reduction = "pca") + theme(axis.text.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5,size=15),axis.text.y = element_text(size=20))
# Adding tissue information in the metadata * Creating a column in the
seurat object metadata that will contain the tissue information * The
tissue information is contained within the name of the samples. *
Ideally this should have been done when creating the seurat object. *
But we shall now add the column by manipulating the sample names within
the metadata itself. * We shall use the str_split function from the
stringr library, to perform the string maniputaion required.
tissues <- c()
for (splm in crc_merged@meta.data$orig.ident)
{
sVector <- str_split(splm,"_")[[1]]
if (length(sVector) == 2)
{
tissues <- c(tissues, sVector[2])
}
else
{
tissues <- c(tissues, paste0(sVector[2],"_",sVector[3],sep=""))
}
}
crc_merged@meta.data['Tissue'] = tissues
invisible(utils::memory.limit(500000))
pc = 28
res = 1
min_dist = 0.3
n = 30
plot_title <- paste("PC = ",pc," res = ",res," min_dist = ",min_dist," n.neighbour = ",n,sep="")
crc_merged <- FindNeighbors(crc_merged, reduction = "pca", dims = 1:pc)
## Computing nearest neighbor graph
## Computing SNN
crc_merged <- FindClusters(crc_merged, resolution = res)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 38255
## Number of edges: 1388345
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9002
## Number of communities: 34
## Elapsed time: 92 seconds
crc_merged <- RunUMAP(crc_merged, reduction = "pca", dims = 1:pc,min.dist = min_dist,n.neighbors = n)
## 09:08:34 UMAP embedding parameters a = 0.9922 b = 1.112
## 09:08:34 Read 38255 rows and found 28 numeric columns
## 09:08:34 Using Annoy for neighbor search, n_neighbors = 30
## 09:08:34 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 09:09:20 Writing NN index file to temp file C:\Users\Coster\AppData\Local\Temp\RtmpqCsw2O\file4d5028814c6b
## 09:09:20 Searching Annoy index using 1 thread, search_k = 3000
## 09:11:10 Annoy recall = 100%
## 09:11:13 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 09:11:31 Initializing from normalized Laplacian + noise (using irlba)
## 09:12:04 Commencing optimization for 200 epochs, with 1767208 positive edges
## 09:19:53 Optimization finished
Idents(crc_merged) <- "seurat_clusters"
p1<- DimPlot(crc_merged, reduction = "umap", label = TRUE,pt.size = 0.5,label.size = 5,raster=FALSE) + ggtitle(plot_title)
p2 <- FeaturePlot(crc_merged,features = c("PTPRC"),pt.size = 1,raster=FALSE) + scale_color_viridis(option = "D") + ggtitle("Immune Cells; PTPRC")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p3 <- FeaturePlot(crc_merged,features = c("KRT18"),pt.size = 1,raster=FALSE) + scale_color_viridis(option = "D") + ggtitle("Epithelial; KRT18")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p4 <- FeaturePlot(crc_merged,features = c("PECAM1"),pt.size = 1,raster=FALSE) + scale_color_viridis(option = "D") + ggtitle("Endothelial; PECAM1")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p5 <- FeaturePlot(crc_merged,features = c("ACTA2"),pt.size = 1,raster=FALSE) + scale_color_viridis(option = "D") + ggtitle("Fibroblast; ACTA2")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
Idents(crc_merged) <- 'Tissue'
p6<- DimPlot(crc_merged, reduction = "umap", label = F,pt.size = 0.5,label.size = 8,raster=FALSE) + ggtitle("Tissue Type")
grid.arrange(p1,p2,p3,p4,p5,p6, ncol =2 )
#Saving satisfied ‘umapped’ data
saveRDS(crc_merged,"sathe_et_al/merged_crc_normalized_pca_umapped.rds")
Note: The options in this function is not limited and further modification is possible. Please follow the ggplot documentation or google search ;)
#Creating the dataframe that will be pass into plotting function
umap_emb <- as.data.frame(crc_merged@reductions$umap@cell.embeddings)
umap_emb['Tissue'] = crc_merged@meta.data$Tissue
umap_emb['Cluster'] = crc_merged@meta.data$seurat_clusters
# Creating a function that will a customize umap for color, point size
customUmap <- function(df,column='Cluster',pt.size=2,x.angle=90,x.hjust=0.5,x.vjust=0.5,x.tick.size=30,y.angle=0,
y.hjust=0.5,y.vjust=0.5,y.tick.size=30,x.color='black',y.color='black',
x.ranges=seq(-20,20,by=5),y.ranges=seq(-20,20,by=5),lab.size=20,custom_col=c(),
lab.col = 'black',clust.label.size = 5,lg.ncol=1, lg.sym.size=6, lg.text.size=15,label.clust=T,
lg.pos="right")
{
# Function arguments:
# df: The data frame that contains the umap information and metadata
# column: The column name in the dataframe that will be used the plot colors on the umap
# pt.size: Size of the points in the graph; default size = 2
# x.angle: The angle of the X-axis label; default angle = 90
# y.angle: The angle of the Y-axis label; default angle =0
# x.hjust: Horizontal spacing of the X-axis label; default = 0.5
# y.hjust: Horizontal spacing of the Y-axis label; default = 0.5
# y.vjust: Vertical spacing of Y-axis label; default = 0.5
# x.tick.size: X-axis label size; default = 30
# y.tick.size: Y-axis label size; default = 30
# x.color and y.color: Add color to X and Y axis ticks; default = black
# x.ranges and y.ranges: Define the range and interval of X and Y axis; default seq(-20,20,by=5)
# lab.size: Size of the X and Y axis title; default = 20
# Custom_col: A vector of colors that should be equal to the number of clusters; default = None
# lab.col: Color of the X and Y axis title; default = black
# clust.label.size: Size of the cluster labels; default = 5
# lg.ncol: Number of columns for the legend; default = 1
# lg.sym.size: Size of the symbol for the legend; default = 6
# lg.text.size: Size of the legends; Default = 15
# label.clust: label Cluster; Default = True
# lg.pos: Position of the legend; default = "right"; can be "bottom", "left", "top"
#Basic plot with point size changes
gplt <- function( variable, pt.size, alpha) {
ggplot(umap_emb, aes(x = UMAP_1, y = UMAP_2 , col = !! sym(variable))) +
geom_point(size=pt.size,alpha=alpha)
}
p <- gplt(column,pt.size,0.8)
# Change x and y axis values
# Change ticks and label colors
# change plot background
p <- p + theme(axis.text.x = element_text(angle = x.angle, hjust = x.hjust, vjust = x.vjust,size = x.tick.size, colour = x.color),
axis.text.y = element_text(angle = y.angle, hjust = y.hjust, vjust = y.vjust,size=y.tick.size,colour = y.color),
axis.title = element_text(size = lab.size),
panel.background = element_blank(), axis.line = element_line(colour = "black"),
legend.position=lg.pos, legend.title.align=0.5 )
p <- p + scale_y_continuous(breaks=x.ranges)
p <- p + scale_x_continuous(breaks=y.ranges)
#Adding colors for each cluster
if (length(custom_col) == 0)
{
p <- p + scale_color_manual(values = distinctColorPalette(length(unique(umap_emb$Cluster))))
}
else
{
p <- p + scale_color_manual(values = custom_col)
}
### Add label
if (label.clust == T)
{
data <- umap_emb %>% group_by(Cluster) %>% select(UMAP_1,UMAP_2)%>% summarize_all(median)
if (lab.col == 'black')
{
p <- p + geom_text(data = data,aes(label = Cluster),colour='black',size = clust.label.size)
}
else
{
p <- p + geom_label(data = data,aes(label = Cluster),size = clust.label.size)
}
}
### Working with legends
p <- p + guides(color = guide_legend(ncol = lg.ncol, override.aes = list(size=lg.sym.size)))
p <- p + theme(legend.key.size = unit(1, 'cm'), #change legend key size
legend.key.height = unit(1, 'cm'), #change legend key height
legend.key.width = unit(1, 'cm'), #change legend key width
legend.title = element_blank(), #change legend title font size
legend.text = element_text(size=lg.text.size)) #change legend text font size
return(p)
}
# Making UMAP with random color set 1
customUmap(umap_emb)
## Adding missing grouping variables: `Cluster`
# Making UMAP with random color set 2
customUmap(umap_emb)
## Adding missing grouping variables: `Cluster`
# We can fix the cluster color by either choosing our own color for each
cluster # or, by saving the randomly generated color into a vector and
push vector into the function.
# Making UMAP with fix color method 1.
colors <- c("#E2B93D","#DA3FB7","#81C9E6","#DCE3B5","#E5B3E2","#C9E7E0",
"#DD74B6","#BEAD9B","#A7A763","#E1E07E","#B3EBB3","#609DD9",
"#6AE552","#D15576","#6B72DF","#78735B","#CC6DE8","#ECD4C3",
"#E6A271","#CD95E8","#9689A1","#7D3BE4","#695A91","#70B671",
"#ADE87A","#61E7A6","#ABB5EC","#D5E73E","#E2D1E4","#67B8B3",
"#71E8D9","#E49EA7","#DE6644","#D934E9")
customUmap(umap_emb,custom_col = colors)
## Adding missing grouping variables: `Cluster`
# Making UMAP with fix color method 2.
colors <-distinctColorPalette(length(unique(umap_emb$Cluster)))
customUmap(umap_emb,custom_col = colors)
## Adding missing grouping variables: `Cluster`
# Making UMAP with fix color method 2.
customUmap(umap_emb,custom_col = colors,lg.ncol = 2)
## Adding missing grouping variables: `Cluster`
# We can also change the location of the legend using the lg.pos option.
# The legend is placed on the right by default. but it can also be
placed on left, top and bottom
# Making UMAP with fix color method 2.
customUmap(umap_emb,custom_col = colors,lg.ncol = 16,lg.pos = 'bottom')
## Adding missing grouping variables: `Cluster`
customUmap(umap_emb,column = "Tissue",lab.col = 'black',clust.label.size = 8, lg.ncol = 1, custom_col = c("#1ECBE1","#E11ECB","#CBE11E"),label.clust = F )